This R Markdown script contains all the code used for outlier detection, data analysis, and plotting. It includes additional statistical analyses, the description for Monte Carlo simulations, and all the statistical models with summaries.
# Loading the data to filter out participants
dataExp1 <- read.delim("./dataNotFilteredExp1.txt", sep ="\t", header = TRUE)
# Remove one participant who did 192 items instead of 303
nbResponse <- aggregate(dataExp1$enteredResponse, by=list(dataExp1$workerId), FUN=length)
rmParticipant1Exp1 <- nbResponse[!nbResponse$x %in% c(303),]$Group.1 # CEA6BA7F432A7DD9F3352BD5AF605FED
dataExp1 <- dataExp1[!dataExp1$workerId %in% rmParticipant1Exp1,]
# Remove three participants who gave ratings lower than 4 to 3 most common Māori words borrowed into New Zealand English
list_carrot <- c("haka","kai","aotearoa")
carrots <- dataExp1[dataExp1$word %in% list_carrot,]
carrots <- carrots[as.numeric(carrots$enteredResponse) < 4,]
rmParticipant2Exp1 <- unique(carrots$workerId) # C2172ACC5CE7A076DF4AD48672826D17 CF5B1FC9D9FDDE28DABFB8B10F60F2A8 F317F5E5817BC9CA03FFA3D0AEE15BCC
dataExp1 <- dataExp1[!dataExp1$workerId %in% rmParticipant2Exp1,]
# Remove ratings for the three words (used to detect outliers)
dataExp1 <- dataExp1[!dataExp1$word %in% c("haka","kai","aotearoa"),]
# Remove two participants whose speakMaori or compMaori is at least (equal to or above) 3
rmParticipant3Exp1 <- unique(dataExp1[dataExp1$speakMaori >= 3 | dataExp1$compMaori >= 3,]$workerId) # 0CAEF0D07D102126BADE4B5F353F3549 FFF87DE6A3C8DCD73B120BD06C3EB966
dataExp1 <- dataExp1[!dataExp1$workerId %in% rmParticipant3Exp1,]
# Remove one participant who did not learn their English in NZ and have been living in their current location in NZ for less than ten years (duration == "short")
summaryExp1WorkerId <- unique(dataExp1[,c("workerId","firstLangCountry","place","duration")])
EngNotInNZExp1 <- summaryExp1WorkerId[!summaryExp1WorkerId$firstLangCountry=="NZ",]
rmParticipant4Exp1 <- unique(EngNotInNZExp1[EngNotInNZExp1$duration=="short",]$workerId) # 31F2039C03219AE69C49AC468F37CA78
dataExp1 <- dataExp1[!dataExp1$workerId %in% rmParticipant4Exp1,]
# Remove two participants who know any other Polynesian languages
rmParticipant5Exp1 <- unique(dataExp1[dataExp1$anyPolynesian=="Yes",]$workerId) # 2B474DC72567F38FD065757F11211DFD 2B92687E2CE8ABD8047732400DFDC424
dataExp1 <- dataExp1[!dataExp1$workerId %in% rmParticipant5Exp1,]
# Remove five participants with language impairments
rmParticipant6Exp1 <- unique(dataExp1[dataExp1$impairments=="Yes",]$workerId)
# 1DD4DB335A0E2735A9AE2AD898AAEFD5 2C40866C3268DB45274C535937D3DA24 933DFF677472628574F9DB2874121FD9 D42298BA8985A7C8E6232CCFC8E99A31 F4599D280C20128B3F6E804C0D2726B0
dataExp1 <- dataExp1[!dataExp1$workerId %in% rmParticipant6Exp1,]
# Remove one participant whose pattern of responses (SD) is below 2SD of the mean of all participants
SD <- aggregate(dataExp1$enteredResponse, by=list(dataExp1$workerId), sd)
cut <- mean(SD$x)-2*sd(SD$x)
rmParticipant7Exp1 <- SD[!SD$x > cut,]$Group.1 # B855CE4A15710AA41194B3D81EE68187
dataExp1 <- dataExp1[!dataExp1$workerId %in% rmParticipant7Exp1,]
# Check the total number of usable participants for Exp1
# length(unique(dataExp1$workerId)) # 85
The data is structured as follows:
This figure summarizes the distribution of participants on demographic and linguistic axes.
Fig. S1: Overview of participants’ sociolinguistic profile in Exp1.
# Model for TableS1
# modelTableS1 <- lmer(enteredResponse ~ c.(length)*c.(score) + type*bin + (1 + c.(length)*c.(score) + type*bin|workerId) + (1|word), control=lmerControl(optimizer="bobyqa"), dataExp1)
# saveRDS(modelTableS1, file = "modelTableS1.rds")
modelTableS1 <- readRDS("./modelTableS1.rds")
kable(xtable(summary(modelTableS1)$coef), digits=3, caption="Table S1: Linear mixed-effects model of NMS confidence ratings across word frequency. All numeric variables in this model are centered.")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 2.913 | 0.058 | 195.849 | 49.916 | 0.000 |
| c.(length) | 0.000 | 0.009 | 169.408 | 0.008 | 0.994 |
| c.(score) | 0.337 | 0.056 | 257.974 | 6.013 | 0.000 |
| typereal | 0.550 | 0.071 | 325.344 | 7.740 | 0.000 |
| bin | -0.017 | 0.012 | 972.574 | -1.387 | 0.166 |
| c.(length):c.(score) | 0.061 | 0.014 | 660.395 | 4.532 | 0.000 |
| typereal:bin | -0.041 | 0.018 | 483.709 | -2.261 | 0.024 |
Fig. S2: Interaction effect plots: Fig. S2a (left) shows the interaction between frequency bin and lexicality (real words vs. nonwords); Fig. S2b (right) shows the interaction between phonotactic score and length of stimuli .
Fig. 1: Mean well-formedness ratings for real words and nonwords by frequency bin. Bin1 contains the most frequent words and Bin5 consists of the least frequent words. Horizontal lines show the model effect estimates for real words vs. nonwords across all frequencies.
Fig. S3: Mean rating vs. phonotactic score for each stimulus for real words and nonwords by frequency bin.
During the analysis of Exp1, we found an overall bias towards higher phonotactic scores of Māori words and lower phonotactic score of Māori-like nonwords (see Fig. S3). This pattern is caused by a lack of nonwords which phonotactically matched the randomly selected words from the Māori RS corpora. To solve this problem, nonwords and words are rematched across participants by finding a phonotactically matched word with the lowest absolute difference for each nonword. Among the rematched pairs, only those whose absolute difference is below 0.3 are kept. In the initial dataset, there are 200 pairs in each bin. After rematching stimuli, the number of pairs in each bin is reduced as follows: 71 (Bin1), 52 (Bin2 and Bin3), 53 (Bin4), 56 (Bin5).
Fig. S4: Mean rating and phonotactic score for each stimulus per frequency bin (rematched pairs).
# Model for Table S2
# modelTableS2 <- lmer(enteredResponse ~ type + (1 + type|workerId) + (1|word), control=lmerControl(optimizer="bobyqa"), dataRematched) # saveRDS(modelTableS2, file = "modelTableS2.rds")
modelTableS2 <- readRDS("./modelTableS2.rds")
kable(xtable(summary(modelTableS2)$coef), digits=3, caption="Table S2: Linear mixed-effects model of NMS confidence ratings (for rematched pairs).")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 2.809 | 0.055 | 172.783 | 50.932 | 0 |
| typereal | 0.410 | 0.054 | 367.196 | 7.546 | 0 |
Within each frequency bin containing the rematched pairs, the distinction between nonwords and words is statistically significant in separate linear mixed-effects models for each bin: NMS give higher ratings to words than nonwords across all the five bins. When the ratings of all the bins are modeled in a single linear mixed-effects model, the distinction between words and nonwords is the only significant predictor; other predictors such as length, phonotactic score, and bin are systematically removed due to a lack of convergence or statistical insignificance. This result demonstrates that NMS’ ability to distinguish between words and nonwords is not an artefact of their phonotactic knowledge.
# Loading the data to filter out participants:
dataNotFiltered <- read.delim("./dataNotFilteredExp2.txt", sep ="\t", header = TRUE)
# Part 1: Removing unusable MS participants
dataMS <- dataNotFiltered[dataNotFiltered$group=="MS",]
# Remove one participant who did 285 items instead of 320 (length 5)
nbResponse <- aggregate(dataMS$enteredResponse, by=list(dataMS$workerId), FUN=length)
rmParticipant1 <- nbResponse[!nbResponse$x %in% c(320,240),]$Group.1 #634726494EE0F397CE0F525B97FA6B76
dataMS <- dataMS[!dataMS$workerId %in% rmParticipant1,]
# Remove two participants whose speakMaori or compMaori is below 3
rmParticipant2 <- unique(dataMS[dataMS$speakMaori <3 | dataMS$compMaori < 3,]$workerId)
#83D25CDFA45FC1A34F4A4176EC7626B3 B193DE3B787FEC86E6CCB3F82F9812E9
dataMS <- dataMS[!dataMS$workerId %in% rmParticipant2,]
# Remove one participant with language impairments
rmParticipant3 <- unique(dataMS[dataMS$impairments=="Yes",]$workerId) #6C7F9A4676E026CD0E3056D8DA19E507
dataMS <- dataMS[!dataMS$workerId %in% rmParticipant3,]
# Detect participants whose median reactionTime is shorter than 2*SD below the mean of all MS
median_RT <- aggregate(dataMS$reactionTime, by=list(dataMS$workerId), median)
names(median_RT) <- c("workerId","median");dataMS <- merge(dataMS,median_RT,by="workerId")
cut <- mean(median_RT$median)-2*sd(median_RT$median)
# median_RT[!median_RT$median > cut,]$workerId # None detected!
# Remove one participant whose pattern of responses (SD) is below 2SD of the mean of all MS participants
SD <- aggregate(dataMS$enteredResponse, by=list(dataMS$workerId), sd)
cut <- mean(SD$x)-2*sd(SD$x)
rmParticipant4 <- SD[!SD$x > cut,]$Group.1 #B33F8EB38F232BB78667404E1D96025B
dataMS <- dataMS[!dataMS$workerId %in% rmParticipant4,]
# Check the total number of usable MS participants
# length(unique(dataMS$workerId)) #40
# Part 2: Removing unusable NMS participants
dataNMS <- dataNotFiltered[dataNotFiltered$group=="NMS",]
# Remove one participant who did 436 items instead of 240 (length 4)
nbResponse <- aggregate(dataNMS$enteredResponse, by=list(dataNMS$workerId), FUN=length)
rmParticipant5 <- nbResponse[!nbResponse$x %in% c(320,240),]$Group.1 # A7017E9F3DD1CC6778A3BC47958CA9B7
dataNMS <- dataNMS[!dataNMS$workerId %in% rmParticipant5,]
# Remove fourteen participants whose speakMaori or compMaori is at least (equal to or above) 3
rmParticipant6 <- unique(dataNMS[dataNMS$speakMaori >= 3 | dataNMS$compMaori >= 3,]$workerId) # 04210C8909800F4041D775C8AE24720D 1E95A51880B3908528596F1CDC142382 1F90388DD2A6052379B3400E7E075B7B 2AABDB6A588C0A38D4D18DCCE2E411E0 3116B3E848662645051AE5AAA5D5E696 349856A9453BF2B1F7A026618E5F43D5 42564FDD3FE2B481B37DE8120E0BBB7C 5DBE9FB11F0E1FF4E5DA1149581576D0 90F93AC5E06AE66B73BFF3B46023FE3C A4B98DB8F5C9298E9FBF865FEB782486 B33F8EB38F232BB78667404E1D96025B C6243B5C4607EF24FD7F4D70B4CD0F41 C6C55B2283D35484E19BE5F4283F64D1 EB57DF86D41FFA1AD74E282B72D82856
dataNMS <- dataNMS[!dataNMS$workerId %in% rmParticipant6,]
# Remove ten participants who did not learn their English in NZ and have been living in their current location in NZ for less than ten years (duration == "short")
summaryNMSWorkerId <- unique(dataNMS[,c("workerId","firstLangCountry","place","duration")])
EngNotInNZ <- summaryNMSWorkerId[!summaryNMSWorkerId$firstLangCountry=="NZ",]
rmParticipant7 <- unique(EngNotInNZ[EngNotInNZ$duration=="short",]$workerId) # 21C1D24B57F6157A628A04DC5A519135 2AB61EA1EEBB7193A3A178D5B819225E 37A91EA9A4A92600B4ECEA239DC5D149 422774F5956A4A43258293A27B497995 4894A6F7674ECBFB304BCA4D9FB11B05 90FBA7311D5EB571D3C88425E4D371C0 E4E762BA8ADADA1CB0D943F8131F09B1 E862C480A8CE650797BE9D7309E52E15 FD2E69EFA7B5138BA6CC0BA31A10BFF0 FFA73BE62F1F21F94F35BC49A9B74838
dataNMS <- dataNMS[!dataNMS$workerId %in% rmParticipant7,]
#Remove one participant who has lived in Hawaii
rmParticipant8 <- unique(dataNMS[dataNMS$hawaii=="Yes",]$workerId) #6C3FBC26E49853CA97CFEB1DC3725C94
dataNMS <- dataNMS[!dataNMS$workerId %in% rmParticipant8,]
# Remove four participants who know any other Polynesian languages
rmParticipant9 <- unique(dataNMS[dataNMS$anyPolynesian=="Yes",]$workerId) # 84B5591229AA4FDE2C154B7873BCCC1E 905775C26C73AF3648FDAF216C1C836B B64C60A9F8121A22B1431FD012FE1BC1 EA6C551EB61BD1B4A3907ADCA5AC8A8C
dataNMS <- dataNMS[!dataNMS$workerId %in% rmParticipant9,]
# Remove three participants with language impairments
rmParticipant10 <- unique(dataNMS[dataNMS$impairments=="Yes",]$workerId) # 080658104D5BE0B7E5FDEDA8BCE32CED 504C5E7063935EB524BEC50FE180D24A CAA87C53B60F941BE5F4B3F4093B33D6
dataNMS <- dataNMS[!dataNMS$workerId %in% rmParticipant10,]
#Remove one participant whose median reactionTime is shorter than 2*SD below the mean of all NMS
median_RT <- aggregate(dataNMS$reactionTime, by=list(dataNMS$workerId), median)
names(median_RT) <- c("workerId","median");dataNMS <- merge(dataNMS,median_RT,by="workerId")
cut <- mean(median_RT$median)-2*sd(median_RT$median)
rmParticipant11 <- median_RT[!median_RT$median > cut,]$workerId # 0CC130754E535B995C330A62C70ABB29
dataNMS <- dataNMS[!dataNMS$workerId %in% rmParticipant11,]
# Remove four participants whose pattern of responses (SD) is below 2SD of the mean of all NMS participants
SD <- aggregate(dataNMS$enteredResponse, by=list(dataNMS$workerId), sd)
cut <- mean(SD$x)-2*sd(SD$x)
rmParticipant12 <- SD[!SD$x > cut,]$Group.1 # 23F8C303E5981DC46B3998C680DF6DFE 3ADCE0E04FE200BA98BC5C3015AF9092 90E5DE278473C80C5C9B1F50642DA2D5 CA1853385689F12642034089CDBFBAE3
dataNMS <- dataNMS[!dataNMS$workerId %in% rmParticipant12,]
# Check the total number of usable NMS participants
# length(unique(dataNMS$workerId)) #113
# Part3: Removing unusable US participants
dataUS <- dataNotFiltered[dataNotFiltered$group=="US",]
# Remove one participant who did 248 items instead of 240 (length 3)
nbResponse <- aggregate(dataUS$enteredResponse, by=list(dataUS$workerId), FUN=length)
rmParticipant13 <- nbResponse[!nbResponse$x %in% c(320,240),]$Group.1 # A9692Y27LBXT9
dataUS <- dataUS[!dataUS$workerId %in% rmParticipant13,]
# Remove fourteen participants whose speakMaori or compMaori is above 0
rmParticipant14 <- unique(dataUS[dataUS$speakMaori > 0 | dataUS$compMaori > 0,]$workerId) # A1AWHHBTJF1XCH A1L92Y6VBTRFP5 A1YAVTFZ9BULVV A28AX4H70DPKKK A2AMI7BVALOCJP A2DVRTL0JB0GT0 A2N0C5I6ZHNWV A2O5J0420C1UPC A2O762LZEZN3JC A4T642D9Z2UYC AGVIHLR1DX7 AMKEGABQMZYIK AMW2XLD9443OH AWJUGWPCUGKEG
dataUS <- dataUS[!dataUS$workerId %in% rmParticipant14,]
# Remove six participants whose maoriList is above 0
rmParticipant15 <- unique(dataUS[dataUS$maoriList > 0,]$workerId) # A19R7FSNDW6WRK A21KGSK2BDVWOA A2XOYCKJJ4PYZA A3ET9X8TXMGXDI A3V8C77FN4FLUA AP4FDDWBJW47O
dataUS <- dataUS[!dataUS$workerId %in% rmParticipant15,]
# Remove one participant who did not learn English in the US
rmParticipant16 <- unique(dataUS[!dataUS$firstLangCountry=="US",]$workerId) # A33MBBGYASZPKN
dataUS <- dataUS[!dataUS$workerId %in% rmParticipant16,]
#Remove three participants who have been to Hawaii
rmParticipant17 <- unique(dataUS[dataUS$hawaii=="Yes",]$workerId) # A1QLYBRMIULAYC A3L0XKGZL9P2ZC ADY44EV3GCW85
dataUS <- dataUS[!dataUS$workerId %in% rmParticipant17,]
#Remove one participant with language impairments
rmParticipant18 <- unique(dataUS[dataUS$impairments=="Yes",]$workerId) # A1NUKP1UCT2R2R
dataUS <- dataUS[!dataUS$workerId %in% rmParticipant18,]
# Detect participants whose median reactionTime is shorter than 2*SD below the mean of all US
median_RT <- aggregate(dataUS$reactionTime, by=list(dataUS$workerId), median)
names(median_RT) <- c("workerId","median");dataUS <- merge(dataUS,median_RT,by="workerId")
cut <- mean(median_RT$median)-2*sd(median_RT$median)
# median_RT[!median_RT$median > cut,]$workerId # None detected!
# Remove one participant whose pattern of responses (SD) is below 2SD of the mean of all US participants
SD <- aggregate(dataUS$enteredResponse, by=list(dataUS$workerId), sd)
cut <- mean(SD$x)-2*sd(SD$x)
rmParticipant19 <- SD[!SD$x > cut,]$Group.1 # A1UHNDB2EQ8TK0
dataUS <- dataUS[!dataUS$workerId %in% rmParticipant19,]
# Check the total number of usable US participants
# length(unique(dataUS$workerId)) #94
dataExp2 <- rbind.fill(dataMS, dataNMS, dataUS)
The data is structured as follows:
Fig. S5: Participants’ basic Māori knowledge and proficiency in Exp2.
Fig. S6: Overview of participants’ profile in Exp2.
Fig. S7: Overview of participants’ profile in Exp2.
| length | MS | NMS | US |
|---|---|---|---|
| 3 | 8 | 18 | 16 |
| 4 | 5 | 17 | 13 |
| 5 | 6 | 20 | 16 |
| 6 | 7 | 21 | 16 |
| 7 | 9 | 18 | 14 |
| 8 | 5 | 19 | 19 |
# Model for Table S4
dataExp2$group <- relevel(dataExp2$group, ref="NMS")
# modelTableS4 <- lmer(enteredResponse ~ c.(scoreDict)*group + c.(scoreRs)*group + c.(scoreUnseg)*group + (1 + c.(scoreDict) + c.(scoreRs) + c.(scoreUnseg)|workerId) + (1 + group|word), control=lmerControl(optimizer="bobyqa"), dataExp2)
# saveRDS(modelTableS4, file = "modelTableS4.rds")
modelTableS4 <- readRDS("./modelTableS4.rds")
kable(xtable(summary(modelTableS4)$coef), digits=3, caption="Table S4: Mixed effects model summary for well-formedness ratings by participant group. All numeric variables in this model are centered.")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 2.798 | 0.052 | 279.354 | 53.625 | 0.000 |
| c.(scoreDict) | 0.640 | 0.069 | 530.620 | 9.312 | 0.000 |
| groupMS | 0.006 | 0.099 | 246.339 | 0.057 | 0.955 |
| groupUS | 0.327 | 0.076 | 256.044 | 4.315 | 0.000 |
| c.(scoreRs) | 0.144 | 0.029 | 1303.637 | 4.997 | 0.000 |
| c.(scoreUnseg) | -0.161 | 0.026 | 964.703 | -6.263 | 0.000 |
| c.(scoreDict):groupMS | 0.025 | 0.109 | 242.850 | 0.227 | 0.821 |
| c.(scoreDict):groupUS | -0.683 | 0.089 | 322.623 | -7.680 | 0.000 |
| groupMS:c.(scoreRs) | 0.000 | 0.032 | 307.581 | 0.015 | 0.988 |
| groupUS:c.(scoreRs) | -0.091 | 0.031 | 676.398 | -2.929 | 0.004 |
| groupMS:c.(scoreUnseg) | 0.062 | 0.031 | 233.161 | 1.981 | 0.049 |
| groupUS:c.(scoreUnseg) | 0.132 | 0.029 | 457.585 | 4.563 | 0.000 |
Fig. 2: Interaction between phonotactic scores and participant groups. The range of phonotactic score is represented on the x-axis and the range of predicted rating is represented on the y-axis.
# Example of R code to randomly sample 1000 words from the Māori dictionary
N <- 1000; m <- 1000
set.seed(1234)
for(i in 1:N){
randomWords <- sample(maoriDictionary, m)
randomWordsTri <- orthoCoding(randomWords,g=c(3))
randomWordsTri <- gsub("_"," ",randomWordsTri)
outputFile <- paste0("randomization_", i, ".txt")
write.table(randomWordsTri, outputFile, row.names = FALSE, col.names=FALSE, quote=FALSE)
}
# Example of R code to run linear mixed-effects models with 1000 samples
listLmerfit <- list()
for(i in 1:1000){
lmerfit<- lmer(enteredResponse ~ c.(score) + (1 + c.(score)|workerId) + (1|word), data=files[[i]])
listLmerfit[[i]] <- coef(summary(lmerfit))[2,4];message('Regression model of', i,'\n')
}
listLmerfitFinal <- unlist(listLmerfit)
Fig. 3: Monte Carlo simulations with 1,000 random samples over 15 dictionary sizes.
Figure 6: Density plot of t-values for frequency-weighted random samples
The following predictors are added individually to the linear mixed-effects model presented in Table S4 and Fig. 2.
# (a) Phonotactics derived from the dictionary of Māori words in New Zealand English (Macalister, 2005)
# modelMāoriBorrowings <- lmer(enteredResponse ~ c.(scoreDictNze)*group + c.(scoreDict)*group + c.(scoreRs)*group + c.(scoreUnseg)*group + (1 + c.(scoreDictNze) + c.(scoreDict) + c.(scoreRs) + c.(scoreUnseg)|workerId) + (1+group|word), control=lmerControl(optimizer="bobyqa"), dataExp2)
# saveRDS(modelMāoriBorrowings, file = "modelMāoriBorrowings.rds")
modelMāoriBorrowings <- readRDS("./modelMāoriBorrowings.rds")
kable(xtable(summary(modelMāoriBorrowings)$coef), digits=3, caption="Table S5: Linear mixed-effects model of well-formedness ratings including the phonotactics derived from the dictionary of Māori words in New Zealand English")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 2.798 | 0.052 | 279.213 | 53.627 | 0.000 |
| c.(scoreDictNze) | 0.434 | 0.162 | 1356.624 | 2.678 | 0.008 |
| groupMS | 0.005 | 0.099 | 246.335 | 0.047 | 0.962 |
| groupUS | 0.326 | 0.076 | 256.062 | 4.311 | 0.000 |
| c.(scoreDict) | 0.483 | 0.094 | 742.091 | 5.155 | 0.000 |
| c.(scoreRs) | 0.132 | 0.029 | 1303.639 | 4.542 | 0.000 |
| c.(scoreUnseg) | -0.157 | 0.026 | 966.223 | -6.093 | 0.000 |
| c.(scoreDictNze):groupMS | -0.169 | 0.176 | 324.977 | -0.963 | 0.336 |
| c.(scoreDictNze):groupUS | -0.240 | 0.173 | 727.496 | -1.382 | 0.167 |
| groupMS:c.(scoreDict) | 0.088 | 0.135 | 255.346 | 0.649 | 0.517 |
| groupUS:c.(scoreDict) | -0.597 | 0.115 | 391.737 | -5.193 | 0.000 |
| groupMS:c.(scoreRs) | 0.005 | 0.032 | 307.499 | 0.142 | 0.887 |
| groupUS:c.(scoreRs) | -0.084 | 0.031 | 679.990 | -2.686 | 0.007 |
| groupMS:c.(scoreUnseg) | 0.061 | 0.031 | 234.060 | 1.950 | 0.052 |
| groupUS:c.(scoreUnseg) | 0.130 | 0.029 | 460.185 | 4.476 | 0.000 |
# (b) Phonotactics derived from NMS’ small active lexicon of Māori (comprising of 121 common loanwords that most NMS can identify as Māori words in our previous study plus 55 urban placenames)
# modelNMSLexicon <- lmer(enteredResponse ~ c.(scoreSmall)*group + c.(scoreDict)*group + c.(scoreRs)*group + c.(scoreUnseg)*group + (1 + c.(scoreSmall) + c.(scoreDict) + c.(scoreRs) + c.(scoreUnseg)|workerId) + (1+group|word), control=lmerControl(optimizer="bobyqa"), dataExp2)
# saveRDS(modelNMSLexicon, file = "modelNMSLexicon.rds")
modelNMSLexicon <- readRDS("./modelNMSLexicon.rds")
kable(xtable(summary(modelNMSLexicon)$coef), digits=3, caption="Table S6: Linear mixed-effects model of well-formedness ratings including the phonotactics derived from NMS’ small active lexicon of Māori")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 2.798 | 0.052 | 279.286 | 53.636 | 0.000 |
| c.(scoreSmall) | 0.436 | 0.235 | 1411.175 | 1.858 | 0.063 |
| groupMS | 0.005 | 0.099 | 246.333 | 0.055 | 0.956 |
| groupUS | 0.327 | 0.076 | 256.037 | 4.318 | 0.000 |
| c.(scoreDict) | 0.587 | 0.076 | 581.840 | 7.699 | 0.000 |
| c.(scoreRs) | 0.136 | 0.029 | 1298.057 | 4.674 | 0.000 |
| c.(scoreUnseg) | -0.160 | 0.026 | 963.925 | -6.245 | 0.000 |
| c.(scoreSmall):groupMS | -0.352 | 0.244 | 346.084 | -1.444 | 0.150 |
| c.(scoreSmall):groupUS | -0.362 | 0.247 | 801.583 | -1.469 | 0.142 |
| groupMS:c.(scoreDict) | 0.066 | 0.118 | 245.848 | 0.563 | 0.574 |
| groupUS:c.(scoreDict) | -0.641 | 0.097 | 338.684 | -6.577 | 0.000 |
| groupMS:c.(scoreRs) | 0.007 | 0.032 | 305.144 | 0.217 | 0.828 |
| groupUS:c.(scoreRs) | -0.085 | 0.031 | 672.445 | -2.694 | 0.007 |
| groupMS:c.(scoreUnseg) | 0.061 | 0.031 | 232.214 | 1.967 | 0.050 |
| groupUS:c.(scoreUnseg) | 0.132 | 0.029 | 457.164 | 4.556 | 0.000 |
# (c) Phonotactics generalized over a list of function words
# modelFunction <- lmer(enteredResponse ~ c.(scoreFunc)*group + c.(scoreDict)*group + c.(scoreRs)*group + c.(scoreUnseg)*group + (1 + c.(scoreFunc) + c.(scoreDict) + c.(scoreRs) + c.(scoreUnseg)|workerId) + (1+group|word), control=lmerControl(optimizer="bobyqa"), dataExp2)
# saveRDS(modelFunction, file = "modelFunction.rds")
modelFunction <- readRDS("./modelFunction.rds")
kable(xtable(summary(modelFunction)$coef), digits=3, caption="Table S7: Linear mixed-effects model of well-formedness ratings including the phonotactics generalized over a list of function words")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 2.798 | 0.052 | 279.340 | 53.651 | 0.000 |
| c.(scoreFunc) | 0.271 | 0.165 | 1377.414 | 1.642 | 0.101 |
| groupMS | 0.005 | 0.099 | 246.359 | 0.053 | 0.958 |
| groupUS | 0.327 | 0.076 | 256.071 | 4.314 | 0.000 |
| c.(scoreDict) | 0.629 | 0.069 | 526.485 | 9.073 | 0.000 |
| c.(scoreRs) | 0.134 | 0.029 | 1312.144 | 4.567 | 0.000 |
| c.(scoreUnseg) | -0.163 | 0.026 | 969.464 | -6.363 | 0.000 |
| c.(scoreFunc):groupMS | -0.010 | 0.171 | 322.338 | -0.059 | 0.953 |
| c.(scoreFunc):groupUS | -0.035 | 0.174 | 764.269 | -0.200 | 0.842 |
| groupMS:c.(scoreDict) | 0.026 | 0.110 | 241.969 | 0.235 | 0.814 |
| groupUS:c.(scoreDict) | -0.681 | 0.090 | 320.894 | -7.573 | 0.000 |
| groupMS:c.(scoreRs) | 0.001 | 0.032 | 307.824 | 0.021 | 0.983 |
| groupUS:c.(scoreRs) | -0.089 | 0.031 | 686.135 | -2.842 | 0.005 |
| groupMS:c.(scoreUnseg) | 0.062 | 0.031 | 233.516 | 1.975 | 0.049 |
| groupUS:c.(scoreUnseg) | 0.131 | 0.029 | 460.344 | 4.547 | 0.000 |
# (d) English phonotactics obtained from the English lexical database CELEX (Baayen, Piepen-brock & van H, 1993)
# modelEng <- lmer(enteredResponse ~ c.(scoreEng)*group + c.(scoreDict)*group + c.(scoreRs)*group + c.(scoreUnseg)*group + (1 + c.(scoreEng) + c.(scoreDict) + c.(scoreRs) + c.(scoreUnseg)|workerId) + (1+group|word), control=lmerControl(optimizer="bobyqa"), dataExp2)
# saveRDS(modelEng, file = "modelEng.rds")
modelEng <- readRDS("./modelEng.rds")
kable(xtable(summary(modelEng)$coef), digits=3, caption="Table S8: Linear mixed-effects model of well-formedness ratings including the English phonotactics obtained from the English lexical database CELEX")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 2.798 | 0.052 | 279.320 | 53.573 | 0.000 |
| c.(scoreEng) | -0.032 | 0.019 | 1319.566 | -1.751 | 0.080 |
| groupMS | 0.006 | 0.099 | 246.371 | 0.060 | 0.952 |
| groupUS | 0.328 | 0.076 | 256.012 | 4.333 | 0.000 |
| c.(scoreDict) | 0.641 | 0.069 | 530.596 | 9.339 | 0.000 |
| c.(scoreRs) | 0.143 | 0.029 | 1304.275 | 4.976 | 0.000 |
| c.(scoreUnseg) | -0.161 | 0.026 | 962.520 | -6.253 | 0.000 |
| c.(scoreEng):groupMS | 0.005 | 0.021 | 319.815 | 0.232 | 0.816 |
| c.(scoreEng):groupUS | -0.017 | 0.020 | 697.304 | -0.848 | 0.397 |
| groupMS:c.(scoreDict) | 0.024 | 0.109 | 243.111 | 0.218 | 0.828 |
| groupUS:c.(scoreDict) | -0.683 | 0.089 | 322.407 | -7.681 | 0.000 |
| groupMS:c.(scoreRs) | 0.000 | 0.032 | 307.677 | -0.012 | 0.990 |
| groupUS:c.(scoreRs) | -0.093 | 0.031 | 675.526 | -3.006 | 0.003 |
| groupMS:c.(scoreUnseg) | 0.062 | 0.031 | 232.117 | 1.986 | 0.048 |
| groupUS:c.(scoreUnseg) | 0.133 | 0.029 | 455.149 | 4.613 | 0.000 |
# (e) Word shape score obtained from a trigram language model built by identifying each segment as a consonant, vowel or long vowel and calculating probabilities over sequences of those categories
# modelWordshape <- lmer(enteredResponse ~ c.(scoreCV)*group + c.(scoreDict)*group + c.(scoreRs)*group + c.(scoreUnseg)*group + (1 + c.(scoreCV) + c.(scoreDict) + c.(scoreRs) + c.(scoreUnseg)|workerId) + (1+group|word), control=lmerControl(optimizer="bobyqa"), dataExp2)
# saveRDS(modelWordshape, file = "modelWordshape.rds")
modelWordshape <- readRDS("./modelWordshape.rds")
kable(xtable(summary(modelWordshape)$coef), digits=3, caption="Table S9: Linear mixed-effects model of well-formedness ratings including word shape scores")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 2.785 | 0.053 | 280.210 | 52.977 | 0.000 |
| c.(scoreCV) | -0.019 | 0.026 | 555.046 | -0.713 | 0.476 |
| groupMS | -0.011 | 0.100 | 246.347 | -0.115 | 0.908 |
| groupUS | 0.323 | 0.076 | 255.726 | 4.232 | 0.000 |
| c.(scoreDict) | 0.671 | 0.070 | 973.649 | 9.629 | 0.000 |
| c.(scoreRs) | 0.139 | 0.029 | 1300.170 | 4.841 | 0.000 |
| c.(scoreUnseg) | -0.159 | 0.026 | 1005.330 | -6.169 | 0.000 |
| c.(scoreCV):groupMS | -0.038 | 0.042 | 253.720 | -0.920 | 0.358 |
| c.(scoreCV):groupUS | -0.059 | 0.034 | 333.169 | -1.748 | 0.081 |
| groupMS:c.(scoreDict) | 0.106 | 0.092 | 271.120 | 1.161 | 0.247 |
| groupUS:c.(scoreDict) | -0.545 | 0.081 | 464.426 | -6.753 | 0.000 |
| groupMS:c.(scoreRs) | -0.002 | 0.032 | 305.368 | -0.077 | 0.939 |
| groupUS:c.(scoreRs) | -0.094 | 0.031 | 650.843 | -3.080 | 0.002 |
| groupMS:c.(scoreUnseg) | 0.052 | 0.030 | 234.587 | 1.722 | 0.086 |
| groupUS:c.(scoreUnseg) | 0.105 | 0.028 | 465.731 | 3.718 | 0.000 |
# (f) Position-length specific probabilities of phonemes obtained from a unigram language model
# modelPosLength <- lmer(enteredResponse ~ c.(scorePosLength)*group + c.(scoreDict)*group + c.(scoreRs)*group + c.(scoreUnseg)*group + (1 + c.(scorePosLength) + c.(scoreDict) + c.(scoreRs) + c.(scoreUnseg)|workerId) + (1+group|word), control=lmerControl(optimizer="bobyqa"), dataExp2)
# saveRDS(modelPosLength, file = "modelPosLength.rds")
modelPosLength <- readRDS("./modelPosLength.rds")
kable(xtable(summary(modelPosLength)$coef), digits=3, caption="Table S10: Linear mixed-effects model of well-formedness ratings including position-length specific probabilities of phonemes")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 2.809 | 0.054 | 276.178 | 52.130 | 0.000 |
| c.(scorePosLength) | -0.073 | 0.080 | 482.355 | -0.912 | 0.362 |
| groupMS | 0.021 | 0.102 | 245.116 | 0.202 | 0.840 |
| groupUS | 0.330 | 0.078 | 254.322 | 4.211 | 0.000 |
| c.(scoreDict) | 0.643 | 0.071 | 689.026 | 9.065 | 0.000 |
| c.(scoreRs) | 0.141 | 0.029 | 1260.914 | 4.936 | 0.000 |
| c.(scoreUnseg) | -0.153 | 0.025 | 966.605 | -6.100 | 0.000 |
| c.(scorePosLength):groupMS | -0.136 | 0.122 | 201.490 | -1.118 | 0.265 |
| c.(scorePosLength):groupUS | -0.076 | 0.101 | 277.885 | -0.758 | 0.449 |
| groupMS:c.(scoreDict) | 0.100 | 0.104 | 252.568 | 0.960 | 0.338 |
| groupUS:c.(scoreDict) | -0.624 | 0.088 | 374.400 | -7.088 | 0.000 |
| groupMS:c.(scoreRs) | -0.005 | 0.032 | 296.740 | -0.170 | 0.865 |
| groupUS:c.(scoreRs) | -0.095 | 0.031 | 648.074 | -3.071 | 0.002 |
| groupMS:c.(scoreUnseg) | 0.054 | 0.030 | 226.573 | 1.788 | 0.075 |
| groupUS:c.(scoreUnseg) | 0.122 | 0.028 | 456.681 | 4.323 | 0.000 |
# (g) Presence of 1 or more macrons
# dataExp2$macron <- FALSE
# dataExp2[grepl("ā|ē|ī|ō|ū",dataExp2$word),]$macron <- TRUE
# modelMacron <- lmer(enteredResponse ~ c.(scoreDict)*group + c.(scoreRs)*group + c.(scoreUnseg)*group + macron*group + (1 + c.(scoreDict) + c.(scoreRs) + c.(scoreUnseg) + macron|workerId) + (1+group|word), control=lmerControl(optimizer="bobyqa"), dataExp2)
# saveRDS(modelMacron, file = "modelMacron.rds")
modelMacron <- readRDS("./modelMacron.rds")
kable(xtable(summary(modelMacron)$coef), digits=3, caption="Table S11: Linear mixed-effects model of well-formedness ratings including the presence of macron(s)")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 2.162 | 0.078 | 438.668 | 27.573 | 0.000 |
| c.(scoreDict) | 0.869 | 0.067 | 550.471 | 13.063 | 0.000 |
| groupMS | 0.202 | 0.133 | 258.319 | 1.525 | 0.128 |
| groupUS | 0.419 | 0.106 | 318.264 | 3.936 | 0.000 |
| c.(scoreRs) | 0.114 | 0.027 | 1220.609 | 4.185 | 0.000 |
| c.(scoreUnseg) | -0.134 | 0.024 | 879.359 | -5.484 | 0.000 |
| macronTRUE | 0.722 | 0.074 | 536.075 | 9.808 | 0.000 |
| c.(scoreDict):groupMS | -0.033 | 0.105 | 252.409 | -0.313 | 0.754 |
| c.(scoreDict):groupUS | -0.709 | 0.087 | 346.916 | -8.165 | 0.000 |
| groupMS:c.(scoreRs) | 0.007 | 0.031 | 307.679 | 0.210 | 0.834 |
| groupUS:c.(scoreRs) | -0.087 | 0.031 | 672.274 | -2.842 | 0.005 |
| groupMS:c.(scoreUnseg) | 0.056 | 0.031 | 233.393 | 1.808 | 0.072 |
| groupUS:c.(scoreUnseg) | 0.124 | 0.029 | 455.002 | 4.346 | 0.000 |
| groupMS:macronTRUE | -0.212 | 0.118 | 264.868 | -1.788 | 0.075 |
| groupUS:macronTRUE | -0.101 | 0.097 | 354.016 | -1.044 | 0.297 |
# (h) Presence of 1 or more digraph(s) such as 'wh' and 'ng'
dataExp2$digraph <- FALSE
dataExp2[grepl("wh|ng",dataExp2$word),]$digraph <- TRUE
# modelDigraph <- lmer(enteredResponse ~ c.(scoreDict)*group + c.(scoreRs)*group + c.(scoreUnseg)*group + digraph*group + (1 + c.(scoreDict) + c.(scoreRs) + c.(scoreUnseg) + digraph|workerId) + (1+group|word), control=lmerControl(optimizer="bobyqa"), dataExp2)
# saveRDS(modelDigraph, file = "modelDigraph.rds")
modelDigraph <- readRDS("./modelDigraph.rds")
kable(xtable(summary(modelDigraph)$coef), digits=3, caption="Table S12: Linear mixed-effects model of well-formedness ratings including the presence of digraph(s)")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 2.736 | 0.054 | 295.528 | 50.933 | 0.000 |
| c.(scoreDict) | 0.643 | 0.069 | 521.557 | 9.376 | 0.000 |
| groupMS | 0.032 | 0.100 | 247.138 | 0.317 | 0.751 |
| groupUS | 0.435 | 0.077 | 259.639 | 5.637 | 0.000 |
| c.(scoreRs) | 0.153 | 0.029 | 1287.519 | 5.357 | 0.000 |
| c.(scoreUnseg) | -0.138 | 0.026 | 974.560 | -5.401 | 0.000 |
| digraphTRUE | 0.176 | 0.042 | 673.956 | 4.187 | 0.000 |
| c.(scoreDict):groupMS | 0.025 | 0.109 | 242.887 | 0.231 | 0.818 |
| c.(scoreDict):groupUS | -0.687 | 0.088 | 310.433 | -7.776 | 0.000 |
| groupMS:c.(scoreRs) | -0.001 | 0.032 | 303.753 | -0.028 | 0.978 |
| groupUS:c.(scoreRs) | -0.106 | 0.030 | 620.529 | -3.497 | 0.001 |
| groupMS:c.(scoreUnseg) | 0.053 | 0.031 | 235.953 | 1.705 | 0.090 |
| groupUS:c.(scoreUnseg) | 0.090 | 0.028 | 437.187 | 3.202 | 0.001 |
| groupMS:digraphTRUE | -0.076 | 0.061 | 238.607 | -1.250 | 0.213 |
| groupUS:digraphTRUE | -0.314 | 0.051 | 345.714 | -6.187 | 0.000 |
# (i) Maximum length of vowel sequence
# Add the maximum length of vowel sequence
dataExp2$vSeq <- gsub("[aeiouāēīōū][aeiouāēīōū][aeiouāēīōū][aeiouāēīōū][aeiouāēīōū]","5", dataExp2$word)
dataExp2$vSeq <- gsub("[aeiouāēīōū][aeiouāēīōū][aeiouāēīōū][aeiouāēīōū]","4", dataExp2$vSeq)
dataExp2$vSeq <- gsub("[aeiouāēīōū][aeiouāēīōū][aeiouāēīōū]","3", dataExp2$vSeq)
dataExp2$vSeq <- gsub("[aeiouāēīōū][aeiouāēīōū]","2", dataExp2$vSeq)
dataExp2$vSeq <- gsub("[aeiouāēīōū]","1", dataExp2$vSeq)
dataExp2$vSeq <- gsub("[whkmrngpt]","", dataExp2$vSeq)
dataExp2$vSeq <- gsub(""," ",dataExp2$vSeq)
vSeqList <- strsplit(dataExp2$vSeq,split=" ");vSeqList1 <- sapply(vSeqList,max)
dataExp2$vSeq <- vSeqList1;dataExp2$vSeq <- as.numeric(dataExp2$vSeq)
# modelVSeq <- lmer(enteredResponse ~ c.(scoreDict)*group + c.(scoreRs)*group + c.(scoreUnseg)*group + c.(vSeq)*group + (1 + c.(scoreDict) + c.(scoreRs) + c.(scoreUnseg) + c.(vSeq)|workerId) + (1+group|word), control=lmerControl(optimizer="bobyqa"), dataExp2)
# saveRDS(modelVSeq, file = "modelVSeq.rds")
modelVSeq <- readRDS("./modelVSeq.rds")
kable(xtable(summary(modelVSeq)$coef), digits=3, caption="Table S13: Linear mixed-effects model of well-formedness ratings including the maximum length of vowel sequence")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 2.798 | 0.052 | 274.315 | 54.264 | 0.000 |
| c.(scoreDict) | 0.466 | 0.065 | 522.340 | 7.203 | 0.000 |
| groupMS | 0.005 | 0.098 | 246.253 | 0.053 | 0.958 |
| groupUS | 0.328 | 0.075 | 255.298 | 4.364 | 0.000 |
| c.(scoreRs) | 0.114 | 0.026 | 1253.653 | 4.303 | 0.000 |
| c.(scoreUnseg) | -0.074 | 0.023 | 1092.396 | -3.288 | 0.001 |
| c.(vSeq) | -0.324 | 0.028 | 536.630 | -11.584 | 0.000 |
| c.(scoreDict):groupMS | 0.127 | 0.103 | 244.729 | 1.234 | 0.218 |
| c.(scoreDict):groupUS | -0.554 | 0.085 | 329.474 | -6.552 | 0.000 |
| groupMS:c.(scoreRs) | 0.021 | 0.030 | 309.210 | 0.695 | 0.488 |
| groupUS:c.(scoreRs) | -0.066 | 0.029 | 683.144 | -2.251 | 0.025 |
| groupMS:c.(scoreUnseg) | 0.012 | 0.025 | 239.385 | 0.471 | 0.638 |
| groupUS:c.(scoreUnseg) | 0.067 | 0.025 | 554.971 | 2.671 | 0.008 |
| groupMS:c.(vSeq) | 0.180 | 0.045 | 253.257 | 4.049 | 0.000 |
| groupUS:c.(vSeq) | 0.240 | 0.037 | 339.698 | 6.584 | 0.000 |
# (j) Type of first segment (consonant or vowel)
dataExp2$first <- gsub("[aeiouāēīōū]","v",dataExp2$word)
dataExp2$first <- gsub("[whkmrngpt]","c",dataExp2$first)
dataExp2$first <- substr(dataExp2$first,1,1)
dataExp2$first <- as.factor(dataExp2$first)
# modelFirst <- lmer(enteredResponse ~ c.(scoreDict)*group + c.(scoreRs)*group + c.(scoreUnseg)*group + first*group + (1 + c.(scoreDict) + c.(scoreRs) + c.(scoreUnseg) + first|workerId) + (1+group|word), control=lmerControl(optimizer="bobyqa"), dataExp2)
# saveRDS(modelFirst, file = "modelFirst.rds")
modelFirst <- readRDS("./modelFirst.rds")
kable(xtable(summary(modelFirst)$coef), digits=3, caption="Table S14: Linear mixed-effects model of well-formedness ratings including the type of first segment")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 3.014 | 0.056 | 319.154 | 53.366 | 0.000 |
| c.(scoreDict) | 0.515 | 0.065 | 543.173 | 7.903 | 0.000 |
| groupMS | -0.063 | 0.104 | 248.565 | -0.609 | 0.543 |
| groupUS | 0.148 | 0.080 | 269.415 | 1.849 | 0.066 |
| c.(scoreRs) | 0.110 | 0.027 | 1260.092 | 3.989 | 0.000 |
| c.(scoreUnseg) | -0.101 | 0.024 | 1029.135 | -4.212 | 0.000 |
| firstv | -0.378 | 0.043 | 565.255 | -8.716 | 0.000 |
| c.(scoreDict):groupMS | 0.108 | 0.103 | 244.910 | 1.049 | 0.295 |
| c.(scoreDict):groupUS | -0.568 | 0.084 | 329.468 | -6.745 | 0.000 |
| groupMS:c.(scoreRs) | 0.011 | 0.031 | 300.282 | 0.342 | 0.733 |
| groupUS:c.(scoreRs) | -0.061 | 0.030 | 655.438 | -2.030 | 0.043 |
| groupMS:c.(scoreUnseg) | 0.028 | 0.028 | 239.674 | 1.013 | 0.312 |
| groupUS:c.(scoreUnseg) | 0.077 | 0.027 | 499.843 | 2.892 | 0.004 |
| groupMS:firstv | 0.150 | 0.068 | 245.512 | 2.214 | 0.028 |
| groupUS:firstv | 0.322 | 0.056 | 335.226 | 5.783 | 0.000 |
# (k) Variant phonotactic scores including trigrams, bigrams, and unigrams
# modelNewScores <- lmer(enteredResponse ~ c.(scoreDictNew)*group + c.(scoreRsNew)*group + c.(scoreUnsegNew)*group + (1 + c.(scoreDictNew) + c.(scoreRsNew) + c.(scoreUnsegNew)|workerId) + (1+group|word), control=lmerControl(optimizer="bobyqa"), dataExp2)
# saveRDS(modelNewScores, file = "modelNewScores.rds")
modelNewScores <- readRDS("./modelNewScores.rds")
kable(xtable(summary(modelNewScores)$coef), digits=3, caption="Table S15: Linear mixed-effects model of well-formedness ratings including variant phonotactic scores with trigrams, bigrams, and unigrams.")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 2.787 | 0.052 | 274.287 | 53.212 | 0.000 |
| c.(scoreDictNew) | 0.809 | 0.077 | 669.797 | 10.564 | 0.000 |
| groupMS | -0.007 | 0.100 | 245.716 | -0.068 | 0.946 |
| groupUS | 0.321 | 0.076 | 255.204 | 4.208 | 0.000 |
| c.(scoreRsNew) | 0.285 | 0.038 | 1262.070 | 7.534 | 0.000 |
| c.(scoreUnsegNew) | -0.267 | 0.032 | 827.698 | -8.311 | 0.000 |
| c.(scoreDictNew):groupMS | -0.002 | 0.113 | 245.741 | -0.018 | 0.986 |
| c.(scoreDictNew):groupUS | -0.709 | 0.096 | 373.479 | -7.388 | 0.000 |
| groupMS:c.(scoreRsNew) | -0.071 | 0.043 | 316.985 | -1.650 | 0.100 |
| groupUS:c.(scoreRsNew) | -0.241 | 0.042 | 691.138 | -5.724 | 0.000 |
| groupMS:c.(scoreUnsegNew) | 0.089 | 0.042 | 246.342 | 2.117 | 0.035 |
| groupUS:c.(scoreUnsegNew) | 0.162 | 0.038 | 442.054 | 4.248 | 0.000 |
# (l) Phonotactic scores without the distinction between short and long vowels marked with macrons
# modelNoMacron <- lmer(enteredResponse ~ c.(scoreDictNoMacron)*group + c.(scoreRsNoMacron)*group + c.(scoreUnsegNoMacron)*group + (1 + c.(scoreDictNoMacron) + c.(scoreRsNoMacron) + c.(scoreUnsegNoMacron)|workerId) + (1+group|word), control=lmerControl(optimizer="bobyqa"), dataExp2)
# saveRDS(modelNoMacron, file = "modelNoMacron.rds")
modelNoMacron <- readRDS("./modelNoMacron.rds")
kable(xtable(summary(modelNoMacron)$coef), digits=3, caption="Table S16: Linear mixed-effects model of well-formedness ratings including phonotactic scores without the distinction between short and long vowels marked with macrons")
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 2.797 | 0.052 | 268.601 | 54.070 | 0.000 |
| c.(scoreDictNoMacron) | 1.180 | 0.082 | 611.118 | 14.460 | 0.000 |
| groupMS | 0.002 | 0.099 | 246.574 | 0.024 | 0.981 |
| groupUS | 0.329 | 0.076 | 254.942 | 4.343 | 0.000 |
| c.(scoreRsNoMacron) | 0.130 | 0.044 | 1241.548 | 2.958 | 0.003 |
| c.(scoreUnsegNoMacron) | -0.310 | 0.039 | 599.160 | -7.937 | 0.000 |
| c.(scoreDictNoMacron):groupMS | -0.159 | 0.126 | 263.149 | -1.259 | 0.209 |
| c.(scoreDictNoMacron):groupUS | -0.901 | 0.106 | 388.697 | -8.466 | 0.000 |
| groupMS:c.(scoreRsNoMacron) | -0.012 | 0.051 | 356.790 | -0.240 | 0.810 |
| groupUS:c.(scoreRsNoMacron) | -0.088 | 0.051 | 775.953 | -1.734 | 0.083 |
| groupMS:c.(scoreUnsegNoMacron) | 0.053 | 0.059 | 248.792 | 0.891 | 0.374 |
| groupUS:c.(scoreUnsegNoMacron) | 0.313 | 0.051 | 378.573 | 6.185 | 0.000 |